library(tidyverse)
library(lubridate) # Working with dates
library(rstan)
set.seed(1)


Municipality_data <- read.csv("Data/Municipality_data_2021_01_18.csv") %>%
  as_tibble() %>%
  filter(AnnéeDécès != "") %>%
  # Filtering what is actually population data
  filter(AnnéeDécès > 1900) # Filtering out 3954907 MILAN RENEE/     2           1923             6             7 99                              127 SAINT-EUF┬ÉENIA      …        201

source("Scripts/prepare_population_data.R")

selected_years <- c(2015, 2016, 2017, 2019, 2020)

Mortality_data_age_sex <- selected_years %>%
  lapply(function(y) {
    mortality_period_municipality_level_age_sex(start_date = ymd(paste(y, "/03/15", sep = "")), end_date = ymd(paste(y, "/08/13", sep = ""))) %>%
      mutate(year = y)
  }) %>%
  bind_rows() %>%
  mutate(age_class = age_class)

Mortality_data_age_sex_joined <- population_age_structure %>%
  filter(year %in% selected_years) %>%
  rename(Municipality_code = Code) %>%
  left_join(Mortality_data_age_sex) %>%
  mutate(NbDeath = ifelse(test = is.na(NbDeath), yes = 0, no = NbDeath))

Mortality_data_age_sex_joined_corrected <- Mortality_data_age_sex_joined %>%
  mutate(population = mapply(function(x, y) max(x, y), population, NbDeath))

age_sex_specific_IFR <- read_delim("Data/age_sex_specific_mortality_rates.csv", " ") %>%
  as_tibble() %>%
  select(age_group, Male, Female) %>%
  mutate(age_class = age_group_name_age_group_converter(age_group_name = age_group)) %>%
  rename(male = Male, female = Female) %>%
  gather(sex, rho_as, male:female) %>%
  mutate(rho_as = rho_as / 100) # Converting percentages to real fractions

selected_cities <- Mortality_data_age_sex_joined %>%
  filter(year == 2020) %>%
  filter(population_total >= 710) %>% # Filtering which should match the electoral data filtering
  .$Municipality_code

full_data <- left_join(Mortality_data_age_sex_joined_corrected, age_sex_specific_IFR) %>%
  filter(Municipality_code %in% selected_cities) %>%
  mutate(covid = ifelse(year == 2020, yes = 1, no = 0)) %>%
  filter(population > 0) %>%
  filter(age_class > 2) %>%
  filter(age_class < 17) %>%
  mutate(age_class = as.numeric(as.factor(age_class))) %>%
  mutate(municipality_idx = Municipality_code %>% as.factor() %>% as.numeric())

create_municipality_code_converter <- function(Municipality_idx, Municipality_code) {
  dict <- Municipality_code %>%
    unique() %>%
    setNames(Municipality_idx %>% unique())
  conversion_function <- function(x) {
    sapply(x, function(xi) dict[xi])
  }
  return(conversion_function)
}

Municipality_code_converter <- create_municipality_code_converter(full_data$municipality_idx, full_data$Municipality_code)

stan_data <- list(
  N = full_data %>% nrow(),
  Nmunicipalities = full_data$municipality_idx %>% unique() %>% length(),
  Ymtas = full_data$NbDeath,
  Nmtas = full_data$population,
  municipality = full_data$municipality_idx,
  covid = full_data$covid,
  Nage_class = full_data$age_class %>% unique() %>% length(),
  age_class = full_data$age_class,
  age_class_scaled = (full_data$age_class - mean(full_data$age_class)) / 2 * sd(full_data$age_class),
  sex = ifelse(test = full_data$sex == "male", yes = 1, no = 0),
  rho_as = full_data$rho_as,
  Municipality_code_converter = Municipality_code_converter
)

saveRDS(stan_data, file = "Results/stan_data_for_fit_age_sex_2021_01_18_second_round.rds")
